Confidence intervals around individual’s scores can be estimated using the Standard Error of Measurement. Dudek (1979) defined this as \(SEM = SD\sqrt{1 - \rho}\), where \(SD\) refers to the standard deviation of scores on the measure and \(\rho\) refers to the reliability of the measure. Intervals can then be formed as usual using \(CI = estimate ± SEM*1.96\).
# dependencies
library(tidyverse)
library(knitr)
library(kableExtra)
library(boot)
library(parallel)
library(bayestestR)
library(patchwork)
library(mdthemes)
library(lme4)
library(sjPlot)
library(emmeans)
library(ggstance)
library(janitor)
# library(merTools) called via merTools:: to avoid namespace collisions between MASS and dplyr
# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
df %>% mutate_if(is.numeric, janitor::round_half_up, digits = n_digits)
}
# standard error of measurement
SEM <- function(SD, reliability){
SD * sqrt(1 - reliability)
}
# table format in output
options(knitr.table.format = "html") # data with bootstrapped confidence intervals
data_estimates_D <- read_csv("../data/processed/data_estimates_D.csv") %>%
filter(method == "bca")
# data for SEM confidence intervals
data_reliability_internal <-
read_csv("../data/processed/data_D_scores_internal_consistency_permuted_estimates_trial_types.csv") |>
select(domain, trial_type, reliability = alpha)
data_scored <-
read_csv("../data/processed/data_estimates_D.csv") |>
filter(method == "bca") |>
select(unique_id,
domain,
trial_type,
estimate)data_sd <- data_scored |>
group_by(domain, trial_type) |>
summarize(SD = sd(estimate))
data_sem <-
full_join(data_reliability_internal, data_sd, by = c("domain", "trial_type")) |>
mutate(SEM = SEM(SD, reliability))
data_estimates_D_sem <- data_scored |>
left_join(data_sem, by = c("domain", "trial_type")) |>
dplyr::mutate(ci_lower = estimate - (SEM*1.96),
ci_upper = estimate + (SEM*1.96),
ci_width = ci_upper - ci_lower,
sig = ifelse((ci_lower > 0 & ci_upper > 0) |
(ci_lower < 0 & ci_upper < 0), TRUE, FALSE)) |>
round_df(2)Because widths are consistent within trial type and domain, only MAP widths are needed, not MAP-MAP widths as with bootstrapping.
Most probable estimate.
data_ci_widths <- data_estimates_D_sem %>%
select(domain, trial_type, ci_width) %>%
distinct(domain, trial_type, .keep_all = TRUE) %>%
mutate(domain = fct_rev(domain))
data_ci_widths %>%
do(point_estimate(.$ci_width, centrality = "MAP")) %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| MAP |
|---|
| 1.23 |
By domain and trial type
data_ci_widths %>%
pivot_wider(names_from = trial_type, values_from = ci_width) %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| domain | tt1 | tt2 | tt3 | tt4 |
|---|---|---|---|---|
| Stigma - ADHD | 1.10 | 1.10 | 1.15 | 1.09 |
| Personality - Agreeableness | 1.13 | 1.17 | 1.25 | 1.29 |
| Body image | 1.31 | 1.32 | 1.21 | 1.25 |
| Friend-Enemy | 1.12 | 1.18 | 1.20 | 1.18 |
| Gender (1) | 1.30 | 1.24 | 1.27 | 1.22 |
| Lincoln-Hitler | 1.11 | 1.11 | 1.11 | 1.13 |
| Race (1) | 1.22 | 1.25 | 1.21 | 1.28 |
| Religion | 1.09 | 1.28 | 1.21 | 1.28 |
| Clinton-Trump | 1.20 | 1.31 | 1.25 | 1.25 |
| Valenced words | 1.12 | 1.30 | 1.22 | 1.23 |
| Personality - Conscientiousness | 1.17 | 1.20 | 1.29 | 1.22 |
| Countries (1) | 1.19 | 1.30 | 1.28 | 1.26 |
| Countries (2) | 1.29 | 1.23 | 1.22 | 1.33 |
| Disgust (2) | 1.28 | 1.26 | 1.24 | 1.16 |
| Disgust (1) | 1.08 | 1.05 | 1.07 | 0.98 |
| Personality - Extraversion | 1.16 | 1.24 | 1.19 | 1.23 |
| Death (1) | 1.11 | 1.23 | 1.11 | 1.26 |
| Death (2) | 1.20 | 1.21 | 1.25 | 1.28 |
| Death (3) | 1.11 | 1.22 | 1.27 | 1.24 |
| Race (2) | 0.87 | 0.90 | 0.91 | 0.90 |
| Gender (2) | 1.21 | 1.22 | 1.22 | 1.28 |
| Personality - Neuroticism | 1.25 | 1.19 | 1.23 | 1.31 |
| Personality - Openness | 1.24 | 1.28 | 1.21 | 1.21 |
| Stigma - PTSD | 1.00 | 1.07 | 1.14 | 1.13 |
| Rich-Poor | 1.25 | 1.29 | 1.32 | 1.26 |
| Stigma - Schizophrenia | 1.11 | 1.06 | 1.05 | 1.09 |
| Sexuality (2) | 0.83 | 0.90 | 0.89 | 0.95 |
| Sexuality (1) | 0.92 | 1.00 | 0.95 | 0.98 |
| Shapes & colors (1) | 1.10 | 1.27 | 1.32 | 1.31 |
| Shapes & colors (2) | 1.14 | 1.35 | 1.09 | 1.06 |
| Shapes & colors (3) | 1.17 | 1.25 | 1.20 | 1.20 |
| Shapes & colors (4) | 1.18 | 1.24 | 1.14 | 1.22 |
| Shapes & colors (5) | 1.14 | 1.30 | 1.25 | 1.19 |
| Shapes & colors (6) | 1.09 | 1.27 | 1.34 | 1.19 |
| Shapes & colors (7) | 0.97 | 1.12 | 1.02 | 1.10 |
p_ci_widths <-
ggplot(data_ci_widths, aes(ci_width, domain)) +
geom_point(position = position_dodge(width = 0.8)) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4, nrow = 1) +
labs(x = "Highest probability (MAP) 95% CI width",
y = "") +
theme(legend.position = "top")
p_ci_widthsBy domain
p_cis_by_domain <-
data_estimates_D_sem %>%
mutate(domain = str_replace(domain, "Personality - ", "Big5: "),
domain = str_replace(domain, "Stigma - ", "Stigma: ")) %>%
arrange(estimate) %>%
group_by(domain) %>%
mutate(ordered_id = row_number()/n()) %>%
ungroup() %>%
ggplot() +
geom_linerange(aes(x = ordered_id, ymin = ci_lower, ymax = ci_upper, color = sig),
alpha = 1) +
geom_point(aes(ordered_id, estimate), size = 0.5, shape = "square") +
geom_hline(yintercept = 0, linetype = "dotted") +
mdthemes::md_theme_linedraw() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top") +
scale_color_viridis_d(end = 0.6, direction = -1) +
xlab("Ranked participant") +
ylab("*D* score") +
labs(color = "95% CI excludes zero point") +
facet_wrap(~ domain, ncol = 5)
p_cis_by_domaindata_diff_zero <-
bind_rows(mutate(data_estimates_D, scoring_method = "*D* scores bootstrapped CIs"),
mutate(data_estimates_D_sem, scoring_method = "*D* scores SEM CIs")) %>%
mutate(domain = as.factor(domain),
trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4"),
trial_type = fct_relevel(trial_type, "Trial type 1", "Trial type 2", "Trial type 3", "Trial type 4")) %>%
group_by(domain, trial_type, scoring_method) %>%
summarize(proportion_diff_zero = mean(sig),
variance = plotrix::std.error(sig)^2,
.groups = "drop") %>%
# model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
mutate(proportion_diff_zero_temp = case_when(proportion_diff_zero < 0.001 ~ 0.001,
proportion_diff_zero > 0.999 ~ 0.999,
TRUE ~ proportion_diff_zero),
proportion_diff_zero_logit = boot::logit(proportion_diff_zero_temp)) %>%
select(-proportion_diff_zero_temp) %>%
#filter(!(proportion_diff_zero == 0 & variance == 0)) %>%
mutate(variance = ifelse(variance == 0, 0.0001, variance))
# # save to disk
#write_csv(data_diff_zero, "../data/results/data_diff_zero_irap_d_vs_pi.csv")
# data_diff_zero <- read_csv("../data/results/data_diff_zero_irap_d_vs_pi.csv")p_diff_zero <-
data_diff_zero %>%
mutate(domain = fct_rev(factor(domain))) %>%
ggplot(aes(proportion_diff_zero, domain, color = scoring_method, shape = scoring_method)) +
geom_linerangeh(aes(xmin = proportion_diff_zero - sqrt(variance)*1.96,
xmax = proportion_diff_zero + sqrt(variance)*1.96),
position = position_dodge(width = 0.75)) +
geom_point(position = position_dodge(width = 0.75)) +
scale_shape_manual(labels = c("*D* scores bootstrapped CIs", "*D* scores SEM CIs"), values = c(15, 16)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores bootstrapped CIs", "*D* scores SEM CIs")) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4) +
labs(x = "Proportion of scores<br/>different from zero point",
y = "",
color = "Scoring method",
shape = "Scoring method") +
theme(legend.position = "top",
panel.spacing = unit(1.5, "lines")) +
coord_cartesian(xlim = c(0,1))
p_diff_zero# fit model
fit_diff_zero <-
lmer(proportion_diff_zero_logit ~ 1 + scoring_method + (1 | domain/trial_type),
weights = 1/variance,
data = data_diff_zero,
# solution from https://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
control = lmerControl(check.nobs.vs.nlev = "ignore",
check.nobs.vs.nRE = "ignore"))
# extract marginal means
results_emm_diff_zero <-
summary(emmeans(fit_diff_zero, ~ scoring_method)) %>%
dplyr::select(scoring_method, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)
# extract re Tau
results_re_tau_diff_zero <- fit_diff_zero %>%
merTools::REsdExtract() %>%
as_tibble(rownames = "trial_type") %>%
rename(tau = value)
# combine
results_diff_zero <- results_emm_diff_zero %>%
# as in metafor package's implementation of prediction intervals, see metafor::predict.rma.R
mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_diff_zero$tau[2]^2)), # [2] is variance for domain
pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_diff_zero$tau[2]^2))) |>
select(-se) |>
mutate_if(is.numeric, boot::inv.logit)
# plot
p_prop_nonzero <-
ggplot(results_diff_zero, aes(scoring_method, estimate,
#color = scoring_method,
#shape = scoring_method,
#group = scoring_method
)) +
geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
geom_point(position = position_dodge(width = 0.8), size = 2.5) +
mdthemes::md_theme_linedraw() +
scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Worse)", "0.25", "0.50", "0.75", "1.00<br/>(Better)")) +
#scale_color_viridis_d(alpha = 1, begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
#scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
scale_x_discrete(labels = c("IRAP D scores bootstrapped CIs", "IRAP D scores SEM CIs")) +
labs(x = "",
y = "Proportion of scores<br/>different from zero point<br/>") +
theme(legend.position = "none") +
coord_flip(ylim = c(0, 1))
p_prop_nonzeroresults_diff_zero %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| scoring_method | estimate | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| D scores bootstrapped CIs | 0.10 | 0.07 | 0.14 | 0.01 | 0.46 |
| D scores SEM CIs | 0.12 | 0.08 | 0.16 | 0.02 | 0.51 |
# tests
data_emms_diff_zero <- emmeans(fit_diff_zero, list(pairwise ~ scoring_method), adjust = "holm")
summary(data_emms_diff_zero)$`pairwise differences of scoring_method` %>%
as.data.frame() %>%
select(comparison = 1, p.value) %>%
mutate(p.value = ifelse(p.value < .001, "< .001", round_half_up(p.value, 3))) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| comparison | p.value |
|---|---|
| (D scores bootstrapped CIs) - (D scores SEM CIs) | < .001 |
Within domain and trial type.
Many have argued that the zero point is arbitrary and not a useful reference point. Instead of asking “what proportion of D/PI scores are different from zero?”, we could also ask “what proportion of D/PI scores are different from one another?”
A common way to assess whether for differences between two estimates is to assess for non overlap between their confidence intervals. However, it has been repeatedly pointed out that this is less than ideal: there are situations where confidence intervals overlap slightly and yet the difference in means is significant.
Cornell Statistical Consulting Unit (2008) Overlapping Confidence Intervals and Statistical Significance argue this clearly. From their whitepaper:
The null hypothesis of zero mean difference is rejected when
\(|x_1 - x_2| > t \times \sqrt{SE_1^2 + SE_2^2}\)
The individual confidence intervals do not overlap when
\(|x_1 - x_2| > t \times (SE_1 + SE_2)\)
It can be shown that the following is always true:
\(\sqrt{SE_1^2 + SE_2^2} \le (SE_1 + SE_2)\)
This means that as \(|x_1 - x_2|\) increases there will be a point at which there is a significant difference between the means, but where the confidence intervals still overlaps. I.e., non overlapping confidence intervals indicate differences, but partially overlapping intervals do not exclude that there being differences.
As such, it is more appropriate and liberal to test for differences between each score and every other score (within the same domain and trial type) based on the CI of the difference scores rather than the non-overlap of intervals of the pair of scores.
# # discriminability using non-overlap of CIs
# discriminability <- function(data, i) {
# data_with_indexes <- data[i,] # boot function requires data and index
# ci_lower <- data_with_indexes$ci_lower
# ci_upper <- data_with_indexes$ci_upper
# n_ci_lower <- length(ci_lower)
# n_ci_upper <- length(ci_upper)
# r_ci_lower <- sum(rank(c(ci_lower, ci_upper))[1:n_ci_lower])
# A <- (r_ci_lower / n_ci_lower - (n_ci_lower + 1) / 2) / n_ci_upper
# return(A)
# }
# discriminatory using the significance of the difference score
# the goal here is to assess mean_diff > 1.96 * sqrt(SE1^2 + SE2^2 for every possible comparison EXCLUDING self comparisons. This is tricky to do within a typical tidyverse workflow as it means doing mutates involving each row of a column and every other row of that column but not the same row.
# the below solution is to use expand.grid to find all combinations of a row with itself, and then use the modulus of the length of the row to filter out the self-pairings. Then do mutates on the rows to assess significant differences. It's enough to then summarize the proportion of significant results across all participants.
discriminability <- function(data, i) {
data_with_indexes <- data[i,] # boot function requires data and index
grid_estimates <- expand.grid(data_with_indexes$estimate, data_with_indexes$estimate) |>
mutate(diff = Var1 - Var2,
row_number = row_number(),
modulus = row_number %% (nrow(data_with_indexes)+1)) |>
filter(modulus != 1) |>
select(diff)
grid_se <- expand.grid(data_with_indexes$se, data_with_indexes$se) |>
mutate(critical_value = 1.96 * sqrt(Var1^2 + Var2^2),
row_number = row_number(),
modulus = row_number %% (nrow(data_with_indexes)+1)) |>
filter(modulus != 1) |>
select(critical_value)
proportion_sig_diff <-
bind_cols(grid_estimates, grid_se) |>
mutate(sig = abs(diff) > critical_value) |>
summarize(proportion_sig_diff = mean(sig)) |>
pull(proportion_sig_diff)
return(proportion_sig_diff)
}
bootstrap_discriminability <- function(data){
require(dplyr)
require(boot)
fit <-
boot::boot(data = data,
statistic = discriminability,
R = 2000,
sim = "ordinary",
stype = "i",
parallel = "multicore",
ncpus = parallel::detectCores())
results <- boot::boot.ci(fit, conf = 0.95, type = "perc") # bca bootstraps throw an error, so use next best
output <-
tibble(
estimate = fit$t0,
ci_lower = results$percent[4],
ci_upper = results$percent[5]
)
return(output)
}data_discriminability_D <- read_csv("../data/results/data_discriminability_D.csv") %>%
filter(method == "bca")# bootstrapping has a long execution time, so load saved values if they've already been calculated
if(file.exists("../data/results/data_discriminability_D_sem.csv")) {
data_discriminability_D_sem <- read_csv("../data/results/data_discriminability_D_sem.csv") %>%
filter(method == "SEM")
} else {
# bootstrap D scores
data_discriminability_D_sem <- data_estimates_D_sem |>
mutate(method = "SEM",
se = (ci_upper - ci_lower)/(1.96*2)) |>
select(unique_id, method, domain, trial_type, estimate, se) |>
group_by(method, domain, trial_type) |>
do(bootstrap_discriminability(data = .)) |>
ungroup() |>
#filter(method == "bca") |>
rename(proportion_discriminable = estimate) |>
mutate(variance = ((ci_upper - ci_lower)/(1.96*2))^2,
domain = as.factor(domain),
trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4"),
scoring_method = "*D* scores SEM CIs")
# save to disk
write_csv(data_discriminability_D_sem, "../data/results/data_discriminability_D_sem.csv")
data_discriminability_D_sem <- data_discriminability_D_sem %>%
filter(method == "SEM")
}# combine
data_discriminability_combined <-
bind_rows(
mutate(data_discriminability_D, scoring_method = "IRAP D scores bootstrapped CIs"),
mutate(data_discriminability_D_sem, scoring_method = "IRAP D scores SEM CIs")
) %>%
mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4")) %>%
#filter(!(proportion_discriminable == 0 & variance == 0)) %>%
mutate(variance = ifelse(variance == 0, 0.0001, variance)) |>
# model cannot be run on zero variance or 0 or 1 logit, so offset by a minuscule amount
mutate(
proportion_discriminable_temp = case_when(proportion_discriminable < 0.001 ~ 0.001,
proportion_discriminable > 0.999 ~ 0.999,
TRUE ~ proportion_discriminable),
proportion_discriminable_logit = boot::logit(proportion_discriminable_temp)
) %>%
select(-proportion_discriminable_temp)
p_discriminability <-
data_discriminability_combined %>%
mutate(domain = fct_rev(factor(domain))) %>%
ggplot(aes(proportion_discriminable, domain, color = scoring_method, shape = scoring_method)) +
geom_linerangeh(aes(xmin = proportion_discriminable - sqrt(variance)*1.96,
xmax = proportion_discriminable + sqrt(variance)*1.96),
position = position_dodge(width = 0.75)) +
geom_point(position = position_dodge(width = 0.75)) +
scale_shape_manual(labels = c("IRAP D scores bootstrapped CIs", "IRAP D scores SEM CIs"), values = c(15, 16)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("IRAP D scores bootstrapped CIs", "IRAP D scores SEM CIs")) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4) +
labs(x = "Proportion of scores<br/>differerent from other scores",
y = "",
color = "Scoring method",
shape = "Scoring method") +
theme(legend.position = "top",
panel.spacing = unit(1.5, "lines")) +
coord_cartesian(xlim = c(0,1))
p_discriminability# fit meta analytic model
fit_disciminability <-
lmer(proportion_discriminable_logit ~ 1 + scoring_method + (1 | domain/trial_type),
weights = 1/variance,
data = data_discriminability_combined,
# solution from https://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
control = lmerControl(check.nobs.vs.nlev = "ignore",
check.nobs.vs.nRE = "ignore"))
# extract marginal means
results_emm_disciminability <-
summary(emmeans(fit_disciminability, ~ scoring_method)) %>%
dplyr::select(scoring_method, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)
# extract re Tau
results_re_tau_disciminability <- fit_disciminability %>%
merTools::REsdExtract() %>%
as_tibble(rownames = "trial_type") %>%
rename(tau = value)
# combine
results_disciminability <- results_emm_disciminability %>%
mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_disciminability$tau[2]^2)), # as in metafor package's implementation of credibility intervals, see metafor::predict.rma.R
pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_disciminability$tau[2]^2))) |>
select(-se) |>
mutate_if(is.numeric, boot::inv.logit)
# plot
p_prop_discriminable <-
ggplot(results_disciminability, aes(scoring_method, estimate,
#color = scoring_method,
#shape = scoring_method,
#group = scoring_method
)) +
geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
geom_point(position = position_dodge(width = 0.8), size = 2.5) +
scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Worse)", "0.25", "0.50", "0.75", "1.00<br/>(Better)")) +
#scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
#scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
scale_x_discrete(labels = c("IRAP D scores bootstrapped CIs", "IRAP D scores SEM CIs")) +
mdthemes::md_theme_linedraw() +
labs(x = "",
y = "Proportion of scores<br/>differerent from other scores<br/>") +
theme(legend.position = "none") +
coord_flip(ylim = c(0, 1))
p_prop_discriminable results_disciminability %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| scoring_method | estimate | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| IRAP D scores bootstrapped CIs | 0.10 | 0.08 | 0.12 | 0.04 | 0.23 |
| IRAP D scores SEM CIs | 0.09 | 0.07 | 0.11 | 0.04 | 0.21 |
# tests
data_emms_disciminability <- emmeans(fit_disciminability, list(pairwise ~ scoring_method), adjust = "holm")
summary(data_emms_disciminability)$`pairwise differences of scoring_method` %>%
as.data.frame() %>%
select(comparison = 1, p.value) %>%
mutate(p.value = ifelse(p.value < .001, "< .001", round_half_up(p.value, 3))) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| comparison | p.value |
|---|---|
| IRAP D scores bootstrapped CIs - IRAP D scores SEM CIs | < .001 |
NB observed range of confidence intervals
# max range as example
data_estimates_D_sem %>%
dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
max = max(ci_upper, na.rm = TRUE),
.groups = "drop") %>%
mutate(range = max - min) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| min | max | range |
|---|---|---|
| -1.84 | 2.09 | 3.93 |
## calculate observed ranges
observed_range_estimates_D <- data_estimates_D %>%
group_by(domain, trial_type) %>%
dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
max = max(ci_upper, na.rm = TRUE),
.groups = "drop") %>%
mutate(range = max - min)
observed_range_estimates_D_sem <- data_estimates_D_sem %>%
group_by(domain, trial_type) %>%
dplyr::summarize(min = min(ci_lower, na.rm = TRUE),
max = max(ci_upper, na.rm = TRUE),
.groups = "drop") %>%
mutate(range = max - min)
# calculate CI / range
data_ci_width_proportions_D <- data_estimates_D %>%
# join this data into the original data
full_join(observed_range_estimates_D, by = c("domain", "trial_type")) %>%
# calculate ci width as a proportion of observed range
mutate(ci_width_proportion = ci_width / range) %>%
mutate(scoring_method = "*D* scores bootstrapped CIs")
data_ci_width_proportions_D_sem <- data_estimates_D_sem %>%
# join this data into the original data
full_join(observed_range_estimates_D_sem, by = c("domain", "trial_type")) %>%
# calculate ci width as a proportion of observed range
mutate(ci_width_proportion = ci_width / range) %>%
mutate(scoring_method = "*D* scores SEM CIs")
# combine
data_ci_width_proportions_combined <-
bind_rows(data_ci_width_proportions_D,
data_ci_width_proportions_D_sem) %>%
mutate(domain = as.factor(domain),
trial_type = fct_relevel(trial_type, "tt1", "tt2", "tt3", "tt4")) %>%
group_by(scoring_method, domain, trial_type) %>%
summarize(ci_width_proportion_mean = mean(ci_width_proportion),
variance = plotrix::std.error(ci_width_proportion)^2) %>%
ungroup() %>%
mutate(variance = ifelse(variance == 0, 0.0001, variance)) %>%
# logit transform
mutate(ci_width_proportion_mean_temp = case_when(ci_width_proportion_mean < 0.0001 ~ 0.0001,
ci_width_proportion_mean > 0.9999 ~ 0.9999,
TRUE ~ ci_width_proportion_mean),
ci_width_proportion_mean_logit = boot::logit(ci_width_proportion_mean_temp)) %>%
select(-ci_width_proportion_mean_temp)
write_csv(data_ci_width_proportions_combined, "../data/results/data_ci_width_proportions_irap_d_boot_vs_sem.csv")
#data_ci_width_proportions_combined <- read_csv("../data/results/data_ci_width_proportions_irap_d_boot_vs_sem.csv")p_coverage <-
data_ci_width_proportions_combined %>%
mutate(domain = fct_rev(factor(domain))) %>%
mutate(trial_type = case_when(trial_type == "tt1" ~ "Trial type 1",
trial_type == "tt2" ~ "Trial type 2",
trial_type == "tt3" ~ "Trial type 3",
trial_type == "tt4" ~ "Trial type 4")) %>%
ggplot(aes(ci_width_proportion_mean, domain, color = scoring_method, shape = scoring_method)) +
geom_point(position = position_dodge(width = 0.75)) +
scale_shape_manual(labels = c("*D* scores boostrapped CIs", "*D* scores SEM CIs"), values = c(15, 16)) +
geom_linerangeh(aes(xmin = ci_width_proportion_mean - sqrt(variance)*1.96,
xmax = ci_width_proportion_mean + sqrt(variance)*1.96),
position = position_dodge(width = 0.75)) +
scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores boostrapped CIs", "*D* scores SEM CIs")) +
mdthemes::md_theme_linedraw() +
facet_wrap(~ trial_type, ncol = 4) +
labs(x = "Proportion of observed range covered<br/>by individual scores' 95% CIs",
y = "",
color = "Scoring method",
shape = "Scoring method") +
theme(legend.position = "top",
panel.spacing = unit(1.5, "lines")) +
coord_cartesian(xlim = c(0,1))
p_coverage# fit model
fit_ci_width_proportions <-
lmer(ci_width_proportion_mean_logit ~ 1 + scoring_method + (1 | domain/trial_type),
weights = 1/variance,
data = data_ci_width_proportions_combined,
# solution from https://www.metafor-project.org/doku.php/tips:rma_vs_lm_lme_lmer
control = lmerControl(check.nobs.vs.nlev = "ignore",
check.nobs.vs.nRE = "ignore"))
# extract marginal means
results_emm_ci_width_proportions <-
summary(emmeans(fit_ci_width_proportions, ~ scoring_method)) %>%
dplyr::select(scoring_method, estimate = emmean, se = SE, ci_lower = lower.CL, ci_upper = upper.CL)
# extract re Tau
results_re_tau_ci_width_proportions <-
merTools::REsdExtract(fit_ci_width_proportions) %>%
as_tibble(rownames = "trial_type") %>%
rename(tau = value)
# combine
results_ci_width_proportions <- results_emm_ci_width_proportions %>%
mutate(pi_lower = estimate - (1.96 * sqrt(se^2 + results_re_tau_ci_width_proportions$tau[2]^2)), # as in metafor package's implementation of credibility intervals, see metafor::predict.rma.R
pi_upper = estimate + (1.96 * sqrt(se^2 + results_re_tau_ci_width_proportions$tau[2]^2))) %>%
select(-se) %>%
mutate_if(is.numeric, boot::inv.logit)
# plot
p_ci_width_proportion_observed_range <-
ggplot(results_ci_width_proportions, aes(scoring_method, estimate,
#color = scoring_method,
#shape = scoring_method,
#group = scoring_method
)) +
geom_linerange(aes(ymin = pi_lower, ymax = pi_upper), size = 0.5, position = position_dodge(width = 0.8), linetype = "dotted") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), size = 1.25, position = position_dodge(width = 0.8)) +
geom_point(position = position_dodge(width = 0.8), size = 2.5) +
scale_shape_discrete(labels = c("IRAP D scores bootstrapped CIs", "IRAP D scores SEM CIs")) +
scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0.00<br/>(Better)", "0.25", "0.50", "0.75", "1.00<br/>(Worse)")) +
#scale_shape_manual(labels = c("*D* scores", "PI scores"), values = c(15, 16)) +
#scale_color_viridis_d(begin = 0.3, end = 0.7, labels = c("*D* scores", "PI scores")) +
scale_x_discrete(labels = c("IRAP D scores bootstrapped CIs", "IRAP D scores SEM CIs")) +
mdthemes::md_theme_linedraw() +
labs(x = "",
y = "Proportion of observed range covered<br/>by individual scores' 95% CIs") +
theme(legend.position = "none") +
coord_flip(ylim = c(0, 1))
p_ci_width_proportion_observed_rangeresults_ci_width_proportions %>%
round_df(2) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| scoring_method | estimate | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| D scores bootstrapped CIs | 0.51 | 0.50 | 0.53 | 0.43 | 0.60 |
| D scores SEM CIs | 0.45 | 0.43 | 0.46 | 0.36 | 0.53 |
# tests
data_emms_ci_width_proportions <- emmeans(fit_ci_width_proportions, list(pairwise ~ scoring_method), adjust = "holm")
summary(data_emms_ci_width_proportions)$`pairwise differences of scoring_method` %>%
as.data.frame() %>%
select(comparison = 1, p.value) %>%
mutate(p.value = ifelse(p.value < .001, "< .001", round_half_up(p.value, 3))) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| comparison | p.value |
|---|---|
| (D scores bootstrapped CIs) - (D scores SEM CIs) | < .001 |
p_cis_by_domainggsave(filename = "plots/supplementary_figure_7S_cis_by_domain_irap_d_boot_vs_sem.pdf",
plot = p_cis_by_domain,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 10,
height = 10,
limitsize = TRUE)p_combined <-
p_prop_nonzero +
p_prop_discriminable +
p_ci_width_proportion_observed_range +
plot_layout(ncol = 1) #, guides = "collect") & theme(legend.position = "top")
p_combinedggsave(filename = "plots/supplementary_figure_8_metaanalyses_irap_d_boot_vs_sem.pdf",
plot = p_combined,
device = "pdf",
# path = NULL,
# dpi = 300,
units = "in",
width = 5,
height = 5,
limitsize = TRUE)sessionInfo()## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] janitor_2.1.0 ggstance_0.3.5 emmeans_1.7.5 sjPlot_2.8.10
## [5] lme4_1.1-30 Matrix_1.4-1 mdthemes_0.1.0 patchwork_1.1.1
## [9] bayestestR_0.13.0 boot_1.3-28 kableExtra_1.3.4 knitr_1.39
## [13] forcats_0.5.1 stringr_1.4.1 dplyr_1.0.9 purrr_0.3.4
## [17] readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6
## [21] tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.0 backports_1.4.1 blme_1.0-5
## [4] systemfonts_1.0.4 splines_4.2.1 listenv_0.8.0
## [7] TH.data_1.1-1 digest_0.6.29 foreach_1.5.2
## [10] htmltools_0.5.3 fansi_1.0.3 magrittr_2.0.3
## [13] googlesheets4_1.0.0 tzdb_0.3.0 globals_0.15.1
## [16] modelr_0.1.8 vroom_1.5.7 sandwich_3.0-2
## [19] svglite_2.1.0 colorspace_2.0-3 rvest_1.0.2
## [22] textshaping_0.3.6 haven_2.5.0 xfun_0.31
## [25] crayon_1.5.1 jsonlite_1.8.0 survival_3.3-1
## [28] zoo_1.8-10 iterators_1.0.14 glue_1.6.2
## [31] gtable_0.3.0 gargle_1.2.0 webshot_0.5.3
## [34] sjstats_0.18.1 sjmisc_2.8.9 abind_1.4-5
## [37] scales_1.2.0 mvtnorm_1.1-3 DBI_1.1.3
## [40] ggeffects_1.1.2 Rcpp_1.0.9 plotrix_3.8-2
## [43] viridisLite_0.4.0 xtable_1.8-4 performance_0.9.1
## [46] merTools_0.5.2 gridtext_0.1.4 bit_4.0.4
## [49] datawizard_0.6.1 httr_1.4.3 ellipsis_0.3.2
## [52] pkgconfig_2.0.3 farver_2.1.1 sass_0.4.2
## [55] dbplyr_2.2.1 utf8_1.2.2 tidyselect_1.1.2
## [58] labeling_0.4.2 rlang_1.0.4 later_1.3.0
## [61] effectsize_0.7.0.5 munsell_0.5.0 cellranger_1.1.0
## [64] tools_4.2.1 cachem_1.0.6 cli_3.3.0
## [67] generics_0.1.3 sjlabelled_1.2.0 broom_1.0.0
## [70] evaluate_0.15 fastmap_1.1.0 arm_1.12-2
## [73] yaml_2.3.5 ragg_1.2.2 bit64_4.0.5
## [76] fs_1.5.2 future_1.27.0 nlme_3.1-157
## [79] mime_0.12 xml2_1.3.3 compiler_4.2.1
## [82] pbkrtest_0.5.1 rstudioapi_0.13 reprex_2.0.1
## [85] bslib_0.4.0 stringi_1.7.8 highr_0.9
## [88] parameters_0.18.2 lattice_0.20-45 nloptr_2.0.3
## [91] markdown_1.1 vctrs_0.4.1 pillar_1.8.0
## [94] lifecycle_1.0.1 furrr_0.3.0 jquerylib_0.1.4
## [97] estimability_1.4 insight_0.18.4 httpuv_1.6.5
## [100] R6_2.5.1 promises_1.2.0.1 parallelly_1.32.1
## [103] codetools_0.2-18 MASS_7.3-57 assertthat_0.2.1
## [106] withr_2.5.0 multcomp_1.4-20 broom.mixed_0.2.9.4
## [109] hms_1.1.1 ggtext_0.1.1 grid_4.2.1
## [112] coda_0.19-4 minqa_1.2.4 rmarkdown_2.14
## [115] snakecase_0.11.0 googledrive_2.0.0 shiny_1.7.2
## [118] lubridate_1.8.0